{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "21a7fcad-2154-4066-aba9-11b921dca3f4",
   "metadata": {},
   "source": [
    "Chapter 22\n",
    "# 鸢尾花数据帧groupby(['species']) 计算统计量\n",
    "Book_1《编程不难》 | 鸢尾花书：从加减乘除到机器学习  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cc60a6cc-5a77-4553-b3e2-0e93e0e2a0b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "89c9f685-14ee-41e8-9c05-ac7cba2ffab9",
   "metadata": {},
   "outputs": [],
   "source": [
    "iris_df = sns.load_dataset(\"iris\")\n",
    "# 从Seaborn中导入鸢尾花数据帧"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7a0e1ae8-bc2e-4c93-b7f6-e0c25a7ffb2f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>setosa</th>\n",
       "      <td>5.006</td>\n",
       "      <td>3.428</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>versicolor</th>\n",
       "      <td>5.936</td>\n",
       "      <td>2.770</td>\n",
       "      <td>4.260</td>\n",
       "      <td>1.326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>virginica</th>\n",
       "      <td>6.588</td>\n",
       "      <td>2.974</td>\n",
       "      <td>5.552</td>\n",
       "      <td>2.026</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            sepal_length  sepal_width  petal_length  petal_width\n",
       "species                                                         \n",
       "setosa             5.006        3.428         1.462        0.246\n",
       "versicolor         5.936        2.770         4.260        1.326\n",
       "virginica          6.588        2.974         5.552        2.026"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 分组计算统计量\n",
    "groupby_mean = iris_df.groupby(['species']).mean()\n",
    "groupby_mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ef657bd7-c3c4-44a8-967b-856eb85d65a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>setosa</th>\n",
       "      <td>0.352490</td>\n",
       "      <td>0.379064</td>\n",
       "      <td>0.173664</td>\n",
       "      <td>0.105386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>versicolor</th>\n",
       "      <td>0.516171</td>\n",
       "      <td>0.313798</td>\n",
       "      <td>0.469911</td>\n",
       "      <td>0.197753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>virginica</th>\n",
       "      <td>0.635880</td>\n",
       "      <td>0.322497</td>\n",
       "      <td>0.551895</td>\n",
       "      <td>0.274650</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            sepal_length  sepal_width  petal_length  petal_width\n",
       "species                                                         \n",
       "setosa          0.352490     0.379064      0.173664     0.105386\n",
       "versicolor      0.516171     0.313798      0.469911     0.197753\n",
       "virginica       0.635880     0.322497      0.551895     0.274650"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "groupby_std  = iris_df.groupby(['species']).std()\n",
    "groupby_std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6e231124-d453-4905-90f6-4743cc3e0056",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>setosa</th>\n",
       "      <td>0.124249</td>\n",
       "      <td>0.143690</td>\n",
       "      <td>0.030159</td>\n",
       "      <td>0.011106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>versicolor</th>\n",
       "      <td>0.266433</td>\n",
       "      <td>0.098469</td>\n",
       "      <td>0.220816</td>\n",
       "      <td>0.039106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>virginica</th>\n",
       "      <td>0.404343</td>\n",
       "      <td>0.104004</td>\n",
       "      <td>0.304588</td>\n",
       "      <td>0.075433</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            sepal_length  sepal_width  petal_length  petal_width\n",
       "species                                                         \n",
       "setosa          0.124249     0.143690      0.030159     0.011106\n",
       "versicolor      0.266433     0.098469      0.220816     0.039106\n",
       "virginica       0.404343     0.104004      0.304588     0.075433"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "groupby_var  = iris_df.groupby(['species']).var()\n",
    "groupby_var"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b6f97fd9-941b-4f43-a7f7-f1d54f3665d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">setosa</th>\n",
       "      <th>sepal_length</th>\n",
       "      <td>0.124249</td>\n",
       "      <td>0.099216</td>\n",
       "      <td>0.016355</td>\n",
       "      <td>0.010331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sepal_width</th>\n",
       "      <td>0.099216</td>\n",
       "      <td>0.143690</td>\n",
       "      <td>0.011698</td>\n",
       "      <td>0.009298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_length</th>\n",
       "      <td>0.016355</td>\n",
       "      <td>0.011698</td>\n",
       "      <td>0.030159</td>\n",
       "      <td>0.006069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_width</th>\n",
       "      <td>0.010331</td>\n",
       "      <td>0.009298</td>\n",
       "      <td>0.006069</td>\n",
       "      <td>0.011106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">versicolor</th>\n",
       "      <th>sepal_length</th>\n",
       "      <td>0.266433</td>\n",
       "      <td>0.085184</td>\n",
       "      <td>0.182898</td>\n",
       "      <td>0.055780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sepal_width</th>\n",
       "      <td>0.085184</td>\n",
       "      <td>0.098469</td>\n",
       "      <td>0.082653</td>\n",
       "      <td>0.041204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_length</th>\n",
       "      <td>0.182898</td>\n",
       "      <td>0.082653</td>\n",
       "      <td>0.220816</td>\n",
       "      <td>0.073102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_width</th>\n",
       "      <td>0.055780</td>\n",
       "      <td>0.041204</td>\n",
       "      <td>0.073102</td>\n",
       "      <td>0.039106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">virginica</th>\n",
       "      <th>sepal_length</th>\n",
       "      <td>0.404343</td>\n",
       "      <td>0.093763</td>\n",
       "      <td>0.303290</td>\n",
       "      <td>0.049094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sepal_width</th>\n",
       "      <td>0.093763</td>\n",
       "      <td>0.104004</td>\n",
       "      <td>0.071380</td>\n",
       "      <td>0.047629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_length</th>\n",
       "      <td>0.303290</td>\n",
       "      <td>0.071380</td>\n",
       "      <td>0.304588</td>\n",
       "      <td>0.048824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_width</th>\n",
       "      <td>0.049094</td>\n",
       "      <td>0.047629</td>\n",
       "      <td>0.048824</td>\n",
       "      <td>0.075433</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         sepal_length  sepal_width  petal_length  petal_width\n",
       "species                                                                      \n",
       "setosa     sepal_length      0.124249     0.099216      0.016355     0.010331\n",
       "           sepal_width       0.099216     0.143690      0.011698     0.009298\n",
       "           petal_length      0.016355     0.011698      0.030159     0.006069\n",
       "           petal_width       0.010331     0.009298      0.006069     0.011106\n",
       "versicolor sepal_length      0.266433     0.085184      0.182898     0.055780\n",
       "           sepal_width       0.085184     0.098469      0.082653     0.041204\n",
       "           petal_length      0.182898     0.082653      0.220816     0.073102\n",
       "           petal_width       0.055780     0.041204      0.073102     0.039106\n",
       "virginica  sepal_length      0.404343     0.093763      0.303290     0.049094\n",
       "           sepal_width       0.093763     0.104004      0.071380     0.047629\n",
       "           petal_length      0.303290     0.071380      0.304588     0.048824\n",
       "           petal_width       0.049094     0.047629      0.048824     0.075433"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "groupby_cov  = iris_df.groupby(['species']).cov()\n",
    "groupby_cov"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b3bfa519-887e-4f42-901a-bb9110fe6810",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">setosa</th>\n",
       "      <th>sepal_length</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.742547</td>\n",
       "      <td>0.267176</td>\n",
       "      <td>0.278098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sepal_width</th>\n",
       "      <td>0.742547</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.177700</td>\n",
       "      <td>0.232752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_length</th>\n",
       "      <td>0.267176</td>\n",
       "      <td>0.177700</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.331630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_width</th>\n",
       "      <td>0.278098</td>\n",
       "      <td>0.232752</td>\n",
       "      <td>0.331630</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">versicolor</th>\n",
       "      <th>sepal_length</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.525911</td>\n",
       "      <td>0.754049</td>\n",
       "      <td>0.546461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sepal_width</th>\n",
       "      <td>0.525911</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.560522</td>\n",
       "      <td>0.663999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_length</th>\n",
       "      <td>0.754049</td>\n",
       "      <td>0.560522</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.786668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_width</th>\n",
       "      <td>0.546461</td>\n",
       "      <td>0.663999</td>\n",
       "      <td>0.786668</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">virginica</th>\n",
       "      <th>sepal_length</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.457228</td>\n",
       "      <td>0.864225</td>\n",
       "      <td>0.281108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sepal_width</th>\n",
       "      <td>0.457228</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.401045</td>\n",
       "      <td>0.537728</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_length</th>\n",
       "      <td>0.864225</td>\n",
       "      <td>0.401045</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.322108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>petal_width</th>\n",
       "      <td>0.281108</td>\n",
       "      <td>0.537728</td>\n",
       "      <td>0.322108</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         sepal_length  sepal_width  petal_length  petal_width\n",
       "species                                                                      \n",
       "setosa     sepal_length      1.000000     0.742547      0.267176     0.278098\n",
       "           sepal_width       0.742547     1.000000      0.177700     0.232752\n",
       "           petal_length      0.267176     0.177700      1.000000     0.331630\n",
       "           petal_width       0.278098     0.232752      0.331630     1.000000\n",
       "versicolor sepal_length      1.000000     0.525911      0.754049     0.546461\n",
       "           sepal_width       0.525911     1.000000      0.560522     0.663999\n",
       "           petal_length      0.754049     0.560522      1.000000     0.786668\n",
       "           petal_width       0.546461     0.663999      0.786668     1.000000\n",
       "virginica  sepal_length      1.000000     0.457228      0.864225     0.281108\n",
       "           sepal_width       0.457228     1.000000      0.401045     0.537728\n",
       "           petal_length      0.864225     0.401045      1.000000     0.322108\n",
       "           petal_width       0.281108     0.537728      0.322108     1.000000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "groupby_corr = iris_df.groupby(['species']).corr()\n",
    "groupby_corr"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
